# -------------------------------------------------------------------
#       -*- coding: utf-8 -*-
#   @Project    :   spider_biqushen
#   @File       :   biqugek.py
#   @Author     :   WANGYU
#   @Time       :   2021-09-03 10:56:32
#   @Software   :   PyCharm
#   @Desc       :   
# -------------------------------------------------------------------


import requests
import time
from bs4 import BeautifulSoup


def get_html(novel_url):
    headers = {
        'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36'
                      ' (KHTML, like Gecko) Chrome/92.0.4515.107 Safari/537.36'
    }
    proxies = {
        'http': 'http://125.77.130.17:80',
        'https': 'https://125.77.130.17:80'
    }
    response = requests.get(novel_url, headers=headers)
    response.encoding = 'utf-8'
    return response


def get_novel():
    url = 'https://www.biqugek.cc/26/26198/7348318.html'
    resp = get_html(url)
    page = BeautifulSoup(resp.text, 'html.parser')
    booktext = page.find('div', attrs={'id': 'txt'})
    print(booktext.text)
    title = page.find('div', attrs={'class': 'chapter-nav'}).find('p').findAll('a')[1].string
    print(title)
    a_href = page.find('div', attrs={'class': 'chapter-control'}).findAll('a')[2]
    next_page_href = 'https://www.biqugek.cc' + a_href.get('href')
    print(next_page_href)
    with open('./files/' + title + '.txt', 'w', encoding='utf-8') as novel:
        novel.writelines(booktext.text)

    # 循环爬下一页
    while next_page_href:
        # headers = {
        #     'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36'
        #                   ' (KHTML, like Gecko) Chrome/92.0.4515.107 Safari/537.36'
        # }
        # child_resp = requests.get(next_page_href, headers=headers)
        child_resp = get_html(next_page_href)
        child_page = BeautifulSoup(child_resp.text, 'html.parser')
        child_book = child_page.find('div', attrs={'id': 'txt'})
        print(child_book.text)
        with open('./files/' + title + '.txt', 'a',encoding='utf-8') as novel:
            novel.writelines(child_book.text)
        child_a_href = child_page.find('div', attrs={'class': 'chapter-control'}).findAll('a')[2]
        next_page_href = 'https://www.biqugek.cc' + child_a_href.get('href')
        print(next_page_href)
        time.sleep(2)


if __name__ == '__main__':
    get_novel()
